In [1]:
# Plotting related python libraries
import matplotlib.pyplot as plt
from matplotlib import style
# Standard csv python libraries
import csv
# Python libraries for manipulating dates and times as objects
import dateutil
import time
import datetime
# Main python library for mathematical calculations
import numpy as np
# Python library for calculating p value in correlation
from scipy.stats.stats import pearsonr
In [2]:
def get_covariance(yfile1, yfile2):
# Calculate the correlation coefficient between the CO2 values from two separate files
yfile1_sigma = np.sqrt(np.var(yfile1))
yfile2_sigma = np.sqrt(np.var(yfile2))
yfile1_mean = np.mean(yfile1)
yfile2_mean = np.mean(yfile2)
files1and2_sum = 0
for i in range(len(yfile1)):
files1and2_sum = files1and2_sum + (yfile1[i]-yfile1_mean)*(yfile2[i]-yfile2_mean)
correlation_coef = files1and2_sum / ((yfile1_sigma)*(yfile2_sigma)) / len(yfile1)
return correlation_coef
In [ ]:
# User imputs name of first file
user_file = input("File Name 1: ")
# Store time and CO2 raw data from first file
results = csv.reader(open(user_file), delimiter=',')
times = []
CO2 = []
row_counter = 0
for r in results:
row_counter += 1
if row_counter>1:
times.append(dateutil.parser.parse(r[0]))
CO2.append(int(r[1]))
# Merge n CO2 data points
n_merge = int(input("n data points to combine:"))
ndata = len(CO2)
nsum_data = int(ndata/n_merge)
data_ave = []
data_unc = []
merge_times = []
# Calculate mean and standard deviation of first file data
# Merge times to match merged data points
for i in range(nsum_data):
idata = CO2[i*n_merge:(i+1)*n_merge]
idata_array = np.asarray(idata)
CO2mean = np.mean(idata_array)
CO2sigma = np.sqrt(np.var(idata_array))
data_ave.append(CO2mean)
data_unc.append(CO2sigma)
itimes = times[i*n_merge:(i+1)*n_merge]
itime = itimes[int(len(itimes)/2)]
merge_times.append(itime)
In [ ]:
# User imputs name of second file
user_file_2 = input("File Name 2: ")
# Store time and CO2 raw data from second file
results_2 = csv.reader(open(user_file_2), delimiter=',')
times_2 = []
CO2_2 = []
row_counter = 0
for r in results_2:
row_counter += 1
if row_counter>1:
times_2.append(dateutil.parser.parse(r[0]))
CO2_2.append(int(r[1]))
# Merge n CO2 data points
n_merge_2 = int(input("n data points to combine:"))
ndata_2 = len(CO2_2)
nsum_data_2 = int(ndata_2/n_merge_2)
data_ave_2 = []
data_unc_2 = []
merge_times_2 = []
# Calculate mean and standard deviation of second file data
# Merge times to match merged data points
for i in range(nsum_data_2):
idata_2 = CO2_2[i*n_merge_2:(i+1)*n_merge_2]
idata_array_2 = np.asarray(idata_2)
CO2mean_2 = np.mean(idata_array_2)
CO2sigma_2 = np.sqrt(np.var(idata_array_2))
data_ave_2.append(CO2mean_2)
data_unc_2.append(CO2sigma_2)
itimes_2 = times_2[i*n_merge_2:(i+1)*n_merge_2]
itime_2 = itimes_2[int(len(itimes_2)/2)]
merge_times_2.append(itime_2)
In [ ]:
# Convert data averages into arrays
CO2file1_array = np.asarray(data_ave)
CO2file2_array = np.asarray(data_ave_2)
correlation_coef = get_covariance(CO2file1_array, CO2file2_array)
# Print correlation coefficient to terminal
print("Correlation: {}".format(correlation_coef))
# Print length of data averages to terminal
print("Length of touch data: {}, Length of open data: {}".format(len(data_ave),len(data_ave_2)))
# Calculate and print p value to terminal
correlation_values = pearsonr(CO2file1_array, CO2file2_array)
print("p value =", correlation_values[1])
In [ ]:
# Plot time vs CO2 data
fig = plt.figure()
plt.plot(merge_times, data_ave, "b.", label="File 1")
plt.plot(merge_times_2, data_ave_2, "g.", label="File 2")
plt.errorbar(merge_times, data_ave, yerr = data_unc)
plt.errorbar(merge_times_2, data_ave_2, yerr = data_unc_2)
plt.xlabel("Time")
plt.ylabel("CO2 (ppm)")
plt.legend()
plt.grid(True,color='k')
fig.autofmt_xdate()
# Plot CO2 correlation between files
fig = plt.figure()
plt.plot(CO2file1_array, CO2file2_array, "b.")
plt.xlabel("CO2 file 1")
plt.ylabel("CO2 file 2")
# Print correlation coefficient to CO2 correlation figure
# Print p vlaue to CO2 correlation figure
corr_statement = "Correlation Coefficient =", correlation_coef
p_value = "P Value =", correlation_values[1]
plt.annotate(corr_statement, xy=(0,1), xytext=(12,-12), va='top',
xycoords='axes fraction', textcoords='offset points')
plt.annotate(p_value, xy=(0,0.94), xytext=(12,-12), va='top',
xycoords='axes fraction', textcoords='offset points')
plt.show()